energy <- read_csv(here("data", "energy.csv"))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## month = col_character(),
## res_total = col_double(),
## ind_total = col_double()
## )
energy_ts <- energy %>%
mutate(date = yearmonth(month)) %>%
as_tsibble(key = NULL, index = date)
ggplot(energy_ts, aes(x = date, y = res_total)) +
geom_line() +
labs(y = "residential energy consumption \n (Trillion BTU)")
Trends: - increasing overall but slightly decreasing around 2005 - seasonality in peaks - no cyclicality or outliers
energy_ts %>%
gg_season(y= res_total) +
theme_minimal() +
labs( x = "month",
y= "residential energy consumption (trillion BTU)")
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="
Trends: - highest in winter months (dec, jan, feb) - peaks back up around jul and aug - increasing over time in general
energy_ts %>% gg_subseries(res_total)
Trends: - clear seasonailty with increasing trend overtime - peaks in dec, jan, feb confirmed
# Find STL decomposition
dcmp <- energy_ts %>%
model(STL(res_total ~ season()))
# View the components
components(dcmp)
## # A dable: 538 x 7 [1M]
## # Key: .model [1]
## # STL Decomposition: res_total = trend + season_year + remainder
## .model date res_total trend season_year remainder season_adjust
## <chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 STL(res_total ~… 1973 Jan 1932. 1257. 684. -9.21 1248.
## 2 STL(res_total ~… 1973 Feb 1687. 1253. 465. -31.3 1222.
## 3 STL(res_total ~… 1973 Mar 1497. 1250. 242. 5.67 1255.
## 4 STL(res_total ~… 1973 Apr 1178. 1246. -62.9 -5.49 1241.
## 5 STL(res_total ~… 1973 May 1015. 1242. -256. 28.7 1271.
## 6 STL(res_total ~… 1973 Jun 934. 1238. -313. 8.89 1247.
## 7 STL(res_total ~… 1973 Jul 981. 1234. -231. -22.0 1212.
## 8 STL(res_total ~… 1973 Aug 1019. 1231. -225. 13.7 1244.
## 9 STL(res_total ~… 1973 Sep 957. 1227. -314. 43.5 1271.
## 10 STL(res_total ~… 1973 Oct 992. 1224. -271. 39.4 1263.
## # … with 528 more rows
# Visualize the decomposed components
components(dcmp) %>% autoplot() +
theme_minimal()
## Autocorrelation function (ACF)
energy_ts %>%
ACF(res_total) %>%
autoplot()
Takeaway: - observations separated by 12 months are the most highly correlated, reflecting strong seasonality we see in all of our other exploratory visualizations.
We’ll use multiplicative due to changes in variance over time
# Create the model:
energy_fit <- energy_ts %>%
model(ets = ETS(res_total ~ season("M")))
# Forecast using the model 10 years into the future:
energy_forecast <- energy_fit %>%
forecast(h = "10 years")
# Plot just the forecasted values (with 80 & 95% CIs):
energy_forecast %>%
autoplot()
# Or plot it added to the original data:
energy_forecast %>%
autoplot(energy_ts)
## Assessing residuals
# Append the predicted values (and residuals) to original energy data
energy_predicted <- broom::augment(energy_fit)
# Use View(energy_predicted) to see the resulting data frame
Now, plot the actual energy values (res_total), and the predicted values (stored as .fitted) atop them:
ggplot(data = energy_predicted) +
geom_line(aes(x = date, y = res_total)) +
geom_line(aes(x = date, y = .fitted), color = "red")
## Explore the residuals
ggplot(data = energy_predicted, aes(x = .resid)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Fit 3 different forecasting models (ETS, ARIMA, SNAIVE):
energy_fit_multi <- energy_ts %>%
model(
ets = ETS(res_total ~ season("M")),
arima = ARIMA(res_total),
snaive = SNAIVE(res_total)
)
# Forecast 3 years into the future (from data end date)
multi_forecast <- energy_fit_multi %>%
forecast(h = "3 years")
# Plot the 3 forecasts
multi_forecast %>%
autoplot(energy_ts)
# Or just view the forecasts (note the similarity across models):
multi_forecast %>%
autoplot()
A. California county outlines (polygons)
ca_counties <- read_sf(here("data","ca_counties","CA_Counties_TIGER2016.shp"))
bit of wrangling
ca_subset <- ca_counties %>%
select(NAME, ALAND) %>%
rename(county_name = NAME, land_area = ALAND)
Use st_crs() to check the existing CRS for spatial data. We see that this CRS is WGS84 (epsg: 3857).
ca_subset %>% st_crs()
## Coordinate Reference System:
## User input: WGS 84 / Pseudo-Mercator
## wkt:
## PROJCRS["WGS 84 / Pseudo-Mercator",
## BASEGEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]],
## CONVERSION["Popular Visualisation Pseudo-Mercator",
## METHOD["Popular Visualisation Pseudo Mercator",
## ID["EPSG",1024]],
## PARAMETER["Latitude of natural origin",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["False easting",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["World - 85°S to 85°N"],
## BBOX[-85.06,-180,85.06,180]],
## ID["EPSG",3857]]
ggplot(data = ca_subset) +
geom_sf(aes(fill = land_area), color = "white", size = 0.1) +
theme_void() +
scale_fill_gradientn(colors = c("cyan","blue","purple"))
B. Invasive red sesbania records (spatial points)
sesbania <- read_sf(here("data","red_sesbania","ds80.shp"))
# Check the CRS:
sesbania %>% st_crs()
## Coordinate Reference System:
## User input: Custom
## wkt:
## PROJCRS["Custom",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]],
## ID["EPSG",6269]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["Degree",0.0174532925199433]]],
## CONVERSION["unnamed",
## METHOD["Albers Equal Area",
## ID["EPSG",9822]],
## PARAMETER["Latitude of false origin",0,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-120,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",34,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.5,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",-4000000,
## LENGTHUNIT["metre",1],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]]
## Notice that this CRS is different from the California counties CRS, so we’ll want to update it to match. Use st_transform() to update the CRS:
sesbania <- st_transform(sesbania, 3857)
# Then check it:
sesbania %>% st_crs()
## Coordinate Reference System:
## User input: EPSG:3857
## wkt:
## PROJCRS["WGS 84 / Pseudo-Mercator",
## BASEGEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]],
## CONVERSION["Popular Visualisation Pseudo-Mercator",
## METHOD["Popular Visualisation Pseudo Mercator",
## ID["EPSG",1024]],
## PARAMETER["Latitude of natural origin",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["False easting",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["World - 85°S to 85°N"],
## BBOX[-85.06,-180,85.06,180]],
## ID["EPSG",3857]]
## now they have the same CRS.
ggplot() +
geom_sf(data = ca_subset) +
geom_sf(data = sesbania, size = 1, color = "red")
## more wrangling - say we want to find the count of red sesbania observed locations in this dataset by county. - How can I go about joining these data so that I can find counts? Don’t worry…st_join() has it covered
ca_sesbania <- ca_subset %>%
st_join(sesbania)
# And then we can find counts (note: these are not counts for individual plants, but by record in the dataset) by county:
sesbania_counts <- ca_sesbania %>%
count(county_name)
# Then we can plot a chloropleth using the number of records for red sesbania as the fill color (instead of what we used previously, land area):
ggplot(data = sesbania_counts) +
geom_sf(aes(fill = n), color = "white", size = 0.1) +
scale_fill_gradientn(colors = c("lightgray","orange","red")) +
theme_minimal() +
labs(fill = "Number of S. punicea records")
## Only plot the county with the greatest number of red sesbania records (Solano), and make a map of those locations:
# Subset of sesbania point locations only in Solano County
solano_sesbania <- sesbania %>%
filter(COUNTY == "Solano")
# Only keep Solano polygon from California County data
solano <- ca_subset %>%
filter(county_name == "Solano")
ggplot() +
geom_sf(data = solano) +
geom_sf(data = solano_sesbania)
## Make an interactive map with {tmap}
# Set the viewing mode to "interactive":
tmap_mode(mode = "view")
## tmap mode set to interactive viewing
# Then make a map (with the polygon fill color updated by variable 'land_area', updating the color palette to "BuGn"), then add another shape layer for the sesbania records (added as dots):
tm_shape(ca_subset) +
tm_fill("land_area", palette = "BuGn") +
tm_shape(sesbania) +
tm_dots()
For extra resources see: - https://cran.r-project.org/web/packages/tmap/vignettes/tmap-getstarted.html - https://geocompr.robinlovelace.net/adv-map.html#interactive-maps